% This toolbox is used to process GHCNmV4 data and convert coastal GHCNmV4
% air temperature (SATs) into near-coast sea-surface temperatures (SSTs)
% following Chan et al (JClim, 2022).
%
% Ref: Chan, Gebbie, & Huybers (2022).   Global and Regional Discrepancies 
% between Early 20th Century Coastal Air and Sea-Surface Temperature Detected 
% by a Coupled Energy-Balance Analysis.  Journal of Climate (in press).

% =========================================================================
% Before getting started, please make sure that you download the
% CD_Computation tool box from "https://github.com/duochanatharvard/CD_Computation"
% and add it to Matlab's Path.

% Also, please set up input and output directories in "SAT_SST_func_IO.m"

% Before doing the analysis, run the following code to preprocess GHCNmV4
% data from ASCII to mat format, and also pick out coastal stations
% The whole process takes less than 10 minutes on a 2019 Macbook Pro.

clear; 
for do_QC = [1 0]  % 1: qcf   0:qcu
    SAT_SST_pre_processing_step_01_Station_T_csv2mat;
    SAT_SST_pre_processing_step_02_find_coastal_island;
end

% =========================================================================
% Below are codes for converting SATs into SSTs
% First Let's set up some parameters, 
% to run the main analysis and a variety of sensitivity tests in Chan et al (JClim 2022)
% We have wrapped the code such that you only need to change "case_id" and "ct_app"
clear;
case_id = 1;  % 1-3: Using observations, among which: 
              % main analysis: 1 [throuhold of neighbors (TON) == 300km]
              % sensitivity: 2 [TON == 200km] and 3 [TON == 400km]
              % 4-20: each for one from 17 CMIP6 models 
              % [regridded 1 degree simulations for 17 CMIP6 models is ~27GB, 
              % which is too large for purpose of this archive, we instead
              % provide subsampled CMIP6 results instead.  
              % Raw regridded data can be accessed through Globus at Harvard 
              % share point, and we will provide access upon request.]
              
ct_app  = 1;  % Main analysis: 1; sensitivity using qcu: 2 - 3

% Below are the parameters used in the development of the code, you do not 
% need to modify them to reproduce results in Chan et al (JClim 2022), but
% please feel free to play with them if you would like to explore the
% results in a more comprehensive manner.
clear('P')
switch ct_app
    case 1, P.app = 'qcf';  P.use_qcf_para = 1;       % Main analysis
    case 2, P.app = 'qcu';  P.use_qcf_para = 1;
    case 3, P.app = 'qcu';  P.use_qcf_para = 0;
end

if case_id <= 3  % Observational analysis
    P.source       = 'OBS';
    switch case_id
        case 1, P.distance = 300;                     % Main analysis
        case 2, P.distance = 200;
        case 3, P.distance = 400;
    end
else             % CMIP6 analysis
    P.model_id     = case_id - 3;
    P.source       = ['CMIP6_model_',num2str(P.model_id)];
    P.distance     = 300; 
end

P.yr_sub_st        = 1850;  % Subset data within this interval for analysis
P.yr_sub_ed        = 2020;  % Subset data within this interval for analysis
P.yr_learn         = 1960:2020;  % use this interval to train model para.
P.date             = SAT_SST_func_IO('date');   % Version of GHCN data used
P.key              = 0;     % Use stations whose temperatures are  
                            %    positively correlated with nearby SSTs
P.use_mask         = 0;     % Do not use gridded land mask to identify 
                            %    coastal stations. Rather, only distance to 
                            %    the nearest coast is used.
P.do_gradual_match = 1;     % This is a fixed parameter
P.cal_anomalies    = -1;    % This is a fixed parameter 
                            %    (-1: direct calculate anomalies without any fitting)
P.do_random        = 1;     % Perturb HadSST4? This parameter is only 
                            %    used when comparing with HadSST4 ensemble.
                            %    For the main analysis, setting it to 1 or
                            %    0 does not affect the computation
P.do_season        = 0;     % The model does not account for seasonality in
                            %    A, B, and C parameters

% Setup the directory =====================================================
SAT_SST_func_IO('data',P);

% Run the analysis ========================================================
% Step 1. Subset data and pair with SSTs (20s)
if strcmp(P.source,'OBS')
    SAT_SST_Step_01_pair_with_HadSST4(P);
    % We provide median estimates of HadSST4 for reproducing main and
    % sensitivity analysis in the main text.
    
    % Uncomment the following if you need to reproduce Fig.~10 and 11, 
    % for which calculating uncertainties of HadSST4 is necessary.
    % SAT_SST_Step_01_pair_with_HadSST4_en_and_raw(P);
    % Note that:
    % > Files of each member should be placed in a folder called
    %   "HadSST_ensemble_v1" under the HadSST4 directory
    % > In addition, to perturb each member to account for sampling, random
    %   and correlated observational errors, associated error files needs
    %   to be downloaded.  
    %       HadSST.4.0.1.0_sampling_uncertainty.nc
    %       HadSST.4.0.1.0_uncorrelated_measurement_uncertainty.nc
    %       HadSST.4.0.1.0_error_covariance_yyyymm.nc
    %   Among which, error covariance files should be placed in a folder 
    %   called "HadSST_error_covariance" under the HadSST4 directory
    % The total file is about 4GB and we point users to HadSST4 download page: 
    %   https://www.metoffice.gov.uk/hadobs/hadsst4/data/download.html
    
elseif strcmp(P.source(1:5),'CMIP6')
    SAT_SST_Step_01_pair_with_CMIP6(P);
end

% Step 2. Calculate the anomalies of raw SAT (160s) 
SAT_SST_Step_02_calculate_anomalies_for_raw_SAT(P);

% Step 3-0. Calculate parameters A, B, and C for BB98d (Eq. 2 in the main text; 420s)
% Output of this step is a 36x15x3 matrix containing A, B, and C parameters
% These parameters are ordered in B, C, A along the third dimension.
% Also, these parameters are scaled by 2e8 to speed up optimization.
SAT_SST_Step_03_get_scaling(P);

% Step 3-1. Calculate the validation error of the fitting (520s)
SAT_SST_Step_03_get_scaling_validation(P);

% Step 3-2. stationarity of parameters using CMIP6 (To be checked)
if ~strcmp(P.source,'OBS')  
    PP = P;    PP.yr_learn = [1900:1960];
    SAT_SST_Step_03_get_scaling(PP);
end

% Step 4. Scale near-coast SATs to get inferred SSTs (320s)
SAT_SST_Step_04_scale_data(P);

% Step 5. Calculate anomalies for scaled SATs (350s)
SAT_SST_Step_05_calculate_anomalies_for_scaled_SAT(P);

% Step 6. Calculate data to be plotted (240s)
SAT_SST_Step_06_post_processing(P);

% The whole process takes about 40 minutes to run on a 2019 Macbook Pro.